pull: Redo logic for "scanning"
authorColin Walters <walters@verbum.org>
Tue, 1 Nov 2016 17:51:55 +0000 (13:51 -0400)
committerAtomic Bot <atomic-devel@projectatomic.io>
Wed, 16 Nov 2016 22:17:25 +0000 (22:17 +0000)
What in the code is called "scanning" is ensuring (potentially
recursively) have an object, and if not, fetching it.  And then if
it's metadata, parsing it and finding new objects to fetch.

This logic has grown fairly complex.  What I'm trying to fix
right now is that if we're doing a pull-local to a remote repository
via `sshfs` (FUSE) we still end up scanning, which is inefficient.

We can take advantage of the "commitpartial" logic here - if a commit
isn't partial, it's complete, hence we don't need to scan it.

At the same time, I'm changing the logic here to *always* do scans for
dirtree objects.  This will fix cases where multiple commits share
dirtree objects.  We have "commitpartial" metadata, but no such concept
of partial/complete for dirtrees.

But, we'll only ever scan dirtrees if we scan commits, which is
what the section above fixes.

Closes: https://github.com/ostreedev/ostree/issues/543
Closes: #564
Approved by: alexlarsson

src/libostree/ostree-repo-pull.c

index 183812cc2dd13b69d6a080eb141e7bfc8c910e71..fd35e87a8b40dec3497e941a84017a2dab7fff8c 100644 (file)
@@ -102,7 +102,6 @@ typedef struct {
   gboolean          is_untrusted;
 
   GPtrArray        *dirs;
-  gboolean      commitpartial_exists;
 
   gboolean      have_previous_bytes;
   guint64       previous_bytes_sec;
@@ -1036,11 +1035,17 @@ scan_commit_object (OtPullData         *pull_data,
                     GError            **error)
 {
   gboolean ret = FALSE;
+  /* If we found a legacy transaction flag, assume we have to scan.
+   * We always do a scan of dirtree objects; see
+   * https://github.com/ostreedev/ostree/issues/543
+   */
+  OstreeRepoCommitState commitstate;
   g_autoptr(GVariant) commit = NULL;
   g_autoptr(GVariant) parent_csum = NULL;
   const guchar *parent_csum_bytes = NULL;
   gpointer depthp;
   gint depth;
+  gboolean is_partial;
 
   if (recursion_depth > OSTREE_MAX_RECURSION)
     {
@@ -1089,6 +1094,13 @@ scan_commit_object (OtPullData         *pull_data,
         }
     }
 
+  if (!ostree_repo_load_commit (pull_data->repo, checksum, &commit, &commitstate, error))
+    goto out;
+
+  /* If we found a legacy transaction flag, assume all commits are partial */
+  is_partial = pull_data->legacy_transaction_resuming
+    || (commitstate & OSTREE_REPO_COMMIT_STATE_PARTIAL) > 0;
+
   if (!ostree_repo_load_variant (pull_data->repo, OSTREE_OBJECT_TYPE_COMMIT, checksum,
                                  &commit, error))
     goto out;
@@ -1137,7 +1149,11 @@ scan_commit_object (OtPullData         *pull_data,
         }
     }
 
-  if (!pull_data->is_commit_only)
+  /* We only recurse to looking whether we need dirtree/dirmeta
+   * objects if the commit is partial, and we're not doing a
+   * commit-only fetch.
+   */
+  if (is_partial && !pull_data->is_commit_only)
     {
       g_autoptr(GVariant) tree_contents_csum = NULL;
       g_autoptr(GVariant) tree_meta_csum = NULL;
@@ -1251,8 +1267,11 @@ scan_one_metadata_object_c (OtPullData         *pull_data,
       do_fetch_detached = (objtype == OSTREE_OBJECT_TYPE_COMMIT);
       enqueue_one_object_request (pull_data, tmp_checksum, objtype, path, do_fetch_detached, FALSE);
     }
-  else if (objtype == OSTREE_OBJECT_TYPE_COMMIT && pull_data->is_commit_only)
+  else if (is_stored && objtype == OSTREE_OBJECT_TYPE_COMMIT)
     {
+      /* For commits, always refetch detached metadata. */
+      enqueue_one_object_request (pull_data, tmp_checksum, objtype, path, TRUE, TRUE);
+
       if (!scan_commit_object (pull_data, tmp_checksum, recursion_depth,
                                pull_data->cancellable, error))
         goto out;
@@ -1260,59 +1279,12 @@ scan_one_metadata_object_c (OtPullData         *pull_data,
       g_hash_table_insert (pull_data->scanned_metadata, g_variant_ref (object), object);
       pull_data->n_scanned_metadata++;
     }
-  else if (is_stored)
+  else if (is_stored && objtype == OSTREE_OBJECT_TYPE_DIR_TREE)
     {
-      gboolean do_scan = pull_data->legacy_transaction_resuming || is_requested || pull_data->commitpartial_exists;
-
-      /* For commits, always refetch detached metadata. */
-      if (objtype == OSTREE_OBJECT_TYPE_COMMIT)
-        enqueue_one_object_request (pull_data, tmp_checksum, objtype, path, TRUE, TRUE);
-
-      /* For commits, check whether we only had a partial fetch */
-      if (!do_scan && objtype == OSTREE_OBJECT_TYPE_COMMIT)
-        {
-          OstreeRepoCommitState commitstate;
-
-          if (!ostree_repo_load_commit (pull_data->repo, tmp_checksum, NULL, &commitstate, error))
-            goto out;
-
-          if (commitstate & OSTREE_REPO_COMMIT_STATE_PARTIAL)
-            {
-              do_scan = TRUE;
-              pull_data->commitpartial_exists = TRUE;
-            }
-          else if (pull_data->maxdepth != 0)
-            {
-              /* Not fully accurate, but the cost here of scanning all
-               * input commit objects if we're doing a depth fetch is
-               * pretty low.  We'll do more accurate handling of depth
-               * when parsing the actual commit.
-               */
-              do_scan = TRUE;
-            }
-        }
+      if (!scan_dirtree_object (pull_data, tmp_checksum, path, recursion_depth,
+                                pull_data->cancellable, error))
+        goto out;
 
-      if (do_scan)
-        {
-          switch (objtype)
-            {
-            case OSTREE_OBJECT_TYPE_COMMIT:
-              if (!scan_commit_object (pull_data, tmp_checksum, recursion_depth,
-                                       pull_data->cancellable, error))
-                goto out;
-              break;
-            case OSTREE_OBJECT_TYPE_DIR_META:
-              break;
-            case OSTREE_OBJECT_TYPE_DIR_TREE:
-              if (!scan_dirtree_object (pull_data, tmp_checksum, path, recursion_depth,
-                                        pull_data->cancellable, error))
-                goto out;
-              break;
-            default:
-              g_assert_not_reached ();
-              break;
-            }
-        }
       g_hash_table_insert (pull_data->scanned_metadata, g_variant_ref (object), object);
       pull_data->n_scanned_metadata++;
     }